
clear

cd "D:\data_replication"

use data\firm_size_distribution\Chaney_Gamma_Sigma_REV.dta, clear
sort sitc3_rev3
by sitc3_rev3: keep if _n == 1
keep sitc3_rev3 zeta
save data\firm_size_distribution\zeta_sitc3.dta, replace

clear


import excel data\firm_size_distribution\crosswalk_HS07_SITC3.xlsx, sheet("Annex 2 Conversion table")

keep C-D
rename C hs6
rename D sitc3
drop if _n < 9

replace hs6 = subinstr(hs6,".","",.)

sort sitc3
drop if sitc3 == "I"
drop if sitc3 == "II"

gen sitc3_rev3 = substr(sitc3, 1, 3)
destring sitc3_rev3, replace

order sitc3_rev3 hs6
sort sitc3_rev3 hs6
by sitc3_rev3 hs6: keep if _n == 1


merge m:1 sitc3_rev3 using data\firm_size_distribution\zeta_sitc3.dta
sort hs6
by hs6: egen zeta_hs6 = mean(zeta)
by hs6: keep if _n == 1
drop if hs6 == ""
keep hs6 zeta_hs6
save data\firm_size_distribution\zeta_hs6.dta, replace



// 2007 - Concord Intermediate to pc8plus
//------------------------------------------------------------------------------



use data\crosswalks\cn8_to_pc8\crosswalk_cn8_pc8_2007, clear
gen hs6 = substr(cn8_string, 1, 6)

merge m:1 hs6 using data\firm_size_distribution\zeta_hs6.dta
// 416 not matched from using 
drop if _merge ==2


sort pc8_string
by pc8_string: egen zeta_pc8 = mean(zeta_hs6)
by pc8_string: keep if _n == 1

keep pc8_string zeta_pc8
gen year = 2007
rename pc8_string prccode

// Merge pc8 to pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\crosswalk_pc8_pc8plus_2003_2007_temp.dta
gen match = 3
replace match = 1 if _merge == 1
drop if _merge == 2    // products not in dataset
drop _merge
gen pc8plus_temp = pc8plus
drop pc8plus

// Merge pc8 to manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time.dta
drop if _merge == 2
replace pc8plus_temp = pc8plus if _merge == 3
drop _merge pc8plus

// Merge pc8 to the second group of manually matched pc8plus

merge m:1 prccode year using data\crosswalks\pc8_over_time\manually_matched_pc8_over_time_2.dta
replace pc8plus_temp = pc8plus if _merge == 3
drop if _merge == 2
drop exit synthetic match _merge pc8plus
rename pc8plus_temp pc8plus


sort pc8plus
by pc8plus: egen zeta_pc8plus = mean(zeta_pc8) 
by pc8plus: keep if _n == 1


keep pc8plus zeta_pc8plus
save data\firm_size_distribution\zeta_pc8plus.dta, replace
drop if pc8plus == ""

egen zeta_median = median(zeta_pc8plus)
replace zeta_pc8plus = zeta_median if zeta_pc8plus == .
drop zeta_median
save data\firm_size_distribution\zeta_pc8plus.dta, replace
export delimited using data\firm_size_distribution\zeta_pc8plus.csv, replace

